From 1f54cd4ac78dd1af48490dcc404bec4adf2876f3 Mon Sep 17 00:00:00 2001 From: FengChen Date: Fri, 21 Oct 2022 15:38:50 +0800 Subject: video_coare: Reimplementing the maxwell drawing trigger mechanism --- src/video_core/engines/maxwell_3d.cpp | 257 ++++++++++------------- src/video_core/engines/maxwell_3d.h | 33 +-- src/video_core/macro/macro_hle.cpp | 47 ++--- src/video_core/macro/macro_interpreter.cpp | 2 +- src/video_core/macro/macro_jit_x64.cpp | 2 +- src/video_core/rasterizer_interface.h | 2 +- src/video_core/renderer_opengl/gl_rasterizer.cpp | 5 +- src/video_core/renderer_opengl/gl_rasterizer.h | 2 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 11 +- src/video_core/renderer_vulkan/vk_rasterizer.h | 2 +- 10 files changed, 139 insertions(+), 224 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 89a9d1f5a..b41aa6fc1 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -117,10 +117,15 @@ void Maxwell3D::InitializeRegisterDefaults() { shadow_state = regs; - mme_inline[MAXWELL3D_REG_INDEX(draw.end)] = true; - mme_inline[MAXWELL3D_REG_INDEX(draw.begin)] = true; - mme_inline[MAXWELL3D_REG_INDEX(vertex_buffer.count)] = true; - mme_inline[MAXWELL3D_REG_INDEX(index_buffer.count)] = true; + draw_command[MAXWELL3D_REG_INDEX(draw.end)] = true; + draw_command[MAXWELL3D_REG_INDEX(draw.begin)] = true; + draw_command[MAXWELL3D_REG_INDEX(vertex_buffer.first)] = true; + draw_command[MAXWELL3D_REG_INDEX(vertex_buffer.count)] = true; + draw_command[MAXWELL3D_REG_INDEX(index_buffer.first)] = true; + draw_command[MAXWELL3D_REG_INDEX(index_buffer.count)] = true; + draw_command[MAXWELL3D_REG_INDEX(index_buffer32_first)] = true; + draw_command[MAXWELL3D_REG_INDEX(index_buffer16_first)] = true; + draw_command[MAXWELL3D_REG_INDEX(index_buffer8_first)] = true; } void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call) { @@ -208,25 +213,6 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume return ProcessCBBind(3); case MAXWELL3D_REG_INDEX(bind_groups[4].raw_config): return ProcessCBBind(4); - case MAXWELL3D_REG_INDEX(draw.end): - return DrawArrays(); - case MAXWELL3D_REG_INDEX(index_buffer32_first): - regs.index_buffer.count = regs.index_buffer32_first.count; - regs.index_buffer.first = regs.index_buffer32_first.first; - dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - return DrawArrays(); - case MAXWELL3D_REG_INDEX(index_buffer16_first): - regs.index_buffer.count = regs.index_buffer16_first.count; - regs.index_buffer.first = regs.index_buffer16_first.first; - dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - return DrawArrays(); - case MAXWELL3D_REG_INDEX(index_buffer8_first): - regs.index_buffer.count = regs.index_buffer8_first.count; - regs.index_buffer.first = regs.index_buffer8_first.first; - dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - // a macro calls this one over and over, should it increase instancing? - // Used by Hades and likely other Vulkan games. - return DrawArrays(); case MAXWELL3D_REG_INDEX(topology_override): use_topology_override = true; return; @@ -261,14 +247,13 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector& parameters) // Execute the current macro. macro_engine->Execute(macro_positions[entry], parameters); - if (mme_draw.current_mode != MMEDrawMode::Undefined) { - FlushMMEInlineDraw(); - } + + ProcessDeferredDraw(); } void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { - // It is an error to write to a register other than the current macro's ARG register before it - // has finished execution. + // It is an error to write to a register other than the current macro's ARG register before + // it has finished execution. if (executing_macro != 0) { ASSERT(method == executing_macro + 1); } @@ -283,9 +268,16 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register, increase the size of the Regs structure"); - const u32 argument = ProcessShadowRam(method, method_argument); - ProcessDirtyRegisters(method, argument); - ProcessMethodCall(method, argument, method_argument, is_last_call); + if (draw_command[method]) { + regs.reg_array[method] = method_argument; + deferred_draw_method.push_back(method); + } else { + ProcessDeferredDraw(); + + const u32 argument = ProcessShadowRam(method, method_argument); + ProcessDirtyRegisters(method, argument); + ProcessMethodCall(method, argument, method_argument, is_last_call); + } } void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, @@ -326,55 +318,6 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, } } -void Maxwell3D::StepInstance(const MMEDrawMode expected_mode, const u32 count) { - if (mme_draw.current_mode == MMEDrawMode::Undefined) { - if (mme_draw.gl_begin_consume) { - mme_draw.current_mode = expected_mode; - mme_draw.current_count = count; - mme_draw.instance_count = 1; - mme_draw.gl_begin_consume = false; - mme_draw.gl_end_count = 0; - } - return; - } else { - if (mme_draw.current_mode == expected_mode && count == mme_draw.current_count && - mme_draw.instance_mode && mme_draw.gl_begin_consume) { - mme_draw.instance_count++; - mme_draw.gl_begin_consume = false; - return; - } else { - FlushMMEInlineDraw(); - } - } - // Tail call in case it needs to retry. - StepInstance(expected_mode, count); -} - -void Maxwell3D::CallMethodFromMME(u32 method, u32 method_argument) { - if (mme_inline[method]) { - regs.reg_array[method] = method_argument; - if (method == MAXWELL3D_REG_INDEX(vertex_buffer.count) || - method == MAXWELL3D_REG_INDEX(index_buffer.count)) { - const MMEDrawMode expected_mode = method == MAXWELL3D_REG_INDEX(vertex_buffer.count) - ? MMEDrawMode::Array - : MMEDrawMode::Indexed; - StepInstance(expected_mode, method_argument); - } else if (method == MAXWELL3D_REG_INDEX(draw.begin)) { - mme_draw.instance_mode = - (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Subsequent) || - (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Unchanged); - mme_draw.gl_begin_consume = true; - } else { - mme_draw.gl_end_count++; - } - } else { - if (mme_draw.current_mode != MMEDrawMode::Undefined) { - FlushMMEInlineDraw(); - } - CallMethod(method, method_argument, true); - } -} - void Maxwell3D::ProcessTopologyOverride() { using PrimitiveTopology = Maxwell3D::Regs::PrimitiveTopology; using PrimitiveTopologyOverride = Maxwell3D::Regs::PrimitiveTopologyOverride; @@ -404,41 +347,6 @@ void Maxwell3D::ProcessTopologyOverride() { } } -void Maxwell3D::FlushMMEInlineDraw() { - LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(), - regs.vertex_buffer.count); - ASSERT_MSG(!(regs.index_buffer.count && regs.vertex_buffer.count), "Both indexed and direct?"); - ASSERT(mme_draw.instance_count == mme_draw.gl_end_count); - - // Both instance configuration registers can not be set at the same time. - ASSERT_MSG(regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::First || - regs.draw.instance_id != Maxwell3D::Regs::Draw::InstanceId::Unchanged, - "Illegal combination of instancing parameters"); - - ProcessTopologyOverride(); - - const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed; - if (ShouldExecute()) { - rasterizer->Draw(is_indexed, true); - } - - // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if - // the game is trying to draw indexed or direct mode. This needs to be verified on HW still - - // it's possible that it is incorrect and that there is some other register used to specify the - // drawing mode. - if (is_indexed) { - regs.index_buffer.count = 0; - } else { - regs.vertex_buffer.count = 0; - } - mme_draw.current_mode = MMEDrawMode::Undefined; - mme_draw.current_count = 0; - mme_draw.instance_count = 0; - mme_draw.instance_mode = false; - mme_draw.gl_begin_consume = false; - mme_draw.gl_end_count = 0; -} - void Maxwell3D::ProcessMacroUpload(u32 data) { macro_engine->AddCode(regs.load_mme.instruction_ptr++, data); } @@ -576,42 +484,6 @@ void Maxwell3D::ProcessSyncPoint() { } } -void Maxwell3D::DrawArrays() { - LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(), - regs.vertex_buffer.count); - ASSERT_MSG(!(regs.index_buffer.count && regs.vertex_buffer.count), "Both indexed and direct?"); - - // Both instance configuration registers can not be set at the same time. - ASSERT_MSG(regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::First || - regs.draw.instance_id != Maxwell3D::Regs::Draw::InstanceId::Unchanged, - "Illegal combination of instancing parameters"); - - ProcessTopologyOverride(); - - if (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Subsequent) { - // Increment the current instance *before* drawing. - state.current_instance++; - } else if (regs.draw.instance_id != Maxwell3D::Regs::Draw::InstanceId::Unchanged) { - // Reset the current instance to 0. - state.current_instance = 0; - } - - const bool is_indexed{regs.index_buffer.count && !regs.vertex_buffer.count}; - if (ShouldExecute()) { - rasterizer->Draw(is_indexed, false); - } - - // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if - // the game is trying to draw indexed or direct mode. This needs to be verified on HW still - - // it's possible that it is incorrect and that there is some other register used to specify the - // drawing mode. - if (is_indexed) { - regs.index_buffer.count = 0; - } else { - regs.vertex_buffer.count = 0; - } -} - std::optional Maxwell3D::GetQueryResult() { switch (regs.report_semaphore.query.report) { case Regs::ReportSemaphore::Report::Payload: @@ -694,4 +566,87 @@ void Maxwell3D::ProcessClearBuffers() { rasterizer->Clear(); } +void Maxwell3D::ProcessDeferredDraw() { + auto method_count = deferred_draw_method.size(); + if (method_count) { + enum class DrawMode { + Undefined, + General, + Instance, + }; + DrawMode draw_mode{DrawMode::Undefined}; + u32 instance_count = 1; + + auto first_method = deferred_draw_method[0]; + if (MAXWELL3D_REG_INDEX(draw.begin) == first_method) { + // The minimum number of methods for drawing must be greater than or equal to + // 3[draw.begin->vertex(index)count->draw.end] to avoid errors in index mode drawing + if (method_count < 3) { + return; + } + draw_mode = + (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Subsequent) || + (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Unchanged) + ? DrawMode::Instance + : DrawMode::General; + } else if (MAXWELL3D_REG_INDEX(index_buffer32_first) == first_method || + MAXWELL3D_REG_INDEX(index_buffer16_first) == first_method || + MAXWELL3D_REG_INDEX(index_buffer8_first) == first_method) { + draw_mode = DrawMode::General; + } + + // Drawing will only begin with draw.begin or index_buffer method, other methods directly + // clear + if (draw_mode == DrawMode::Undefined) { + deferred_draw_method.clear(); + return; + } + + if (draw_mode == DrawMode::Instance) { + ASSERT_MSG(deferred_draw_method.size() % 4 == 0, "Instance mode method size error"); + instance_count = static_cast(deferred_draw_method.size()) / 4; + } else { + if (MAXWELL3D_REG_INDEX(index_buffer32_first) == first_method) { + regs.index_buffer.count = regs.index_buffer32_first.count; + regs.index_buffer.first = regs.index_buffer32_first.first; + dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + } else if (MAXWELL3D_REG_INDEX(index_buffer32_first) == first_method) { + regs.index_buffer.count = regs.index_buffer16_first.count; + regs.index_buffer.first = regs.index_buffer16_first.first; + dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + } else if (MAXWELL3D_REG_INDEX(index_buffer32_first) == first_method) { + regs.index_buffer.count = regs.index_buffer8_first.count; + regs.index_buffer.first = regs.index_buffer8_first.first; + dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + } + } + + LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(), + regs.vertex_buffer.count); + + ASSERT_MSG(!(regs.index_buffer.count && regs.vertex_buffer.count), + "Both indexed and direct?"); + + // Both instance configuration registers can not be set at the same time. + ASSERT_MSG(regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::First || + regs.draw.instance_id != Maxwell3D::Regs::Draw::InstanceId::Unchanged, + "Illegal combination of instancing parameters"); + + ProcessTopologyOverride(); + + const bool is_indexed = regs.index_buffer.count && !regs.vertex_buffer.count; + if (ShouldExecute()) { + rasterizer->Draw(is_indexed, instance_count); + } + + if (is_indexed) { + regs.index_buffer.count = 0; + } else { + regs.vertex_buffer.count = 0; + } + + deferred_draw_method.clear(); + } +} + } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 75e3b868d..1472e8871 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -3048,8 +3048,6 @@ public: }; std::array shader_stages; - - u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering. }; State state{}; @@ -3064,11 +3062,6 @@ public: void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) override; - /// Write the value to the register identified by method. - void CallMethodFromMME(u32 method, u32 method_argument); - - void FlushMMEInlineDraw(); - bool ShouldExecute() const { return execute_on; } @@ -3081,21 +3074,6 @@ public: return *rasterizer; } - enum class MMEDrawMode : u32 { - Undefined, - Array, - Indexed, - }; - - struct MMEDrawState { - MMEDrawMode current_mode{MMEDrawMode::Undefined}; - u32 current_count{}; - u32 instance_count{}; - bool instance_mode{}; - bool gl_begin_consume{}; - u32 gl_end_count{}; - } mme_draw; - struct DirtyState { using Flags = std::bitset::max()>; using Table = std::array; @@ -3164,14 +3142,10 @@ private: /// Handles a write to the CB_BIND register. void ProcessCBBind(size_t stage_index); - /// Handles a write to the VERTEX_END_GL register, triggering a draw. - void DrawArrays(); - /// Handles use of topology overrides (e.g., to avoid using a topology assigned from a macro) void ProcessTopologyOverride(); - // Handles a instance drawcall from MME - void StepInstance(MMEDrawMode expected_mode, u32 count); + void ProcessDeferredDraw(); /// Returns a query's value or an empty object if the value will be deferred through a cache. std::optional GetQueryResult(); @@ -3184,8 +3158,6 @@ private: /// Start offsets of each macro in macro_memory std::array macro_positions{}; - std::array mme_inline{}; - /// Macro method that is currently being executed / being fed parameters. u32 executing_macro = 0; /// Parameters that have been submitted to the macro call so far. @@ -3198,6 +3170,9 @@ private: bool execute_on{true}; bool use_topology_override{false}; + + std::array draw_command{}; + std::vector deferred_draw_method; }; #define ASSERT_REG_POSITION(field_name, position) \ diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index 8a8adbb42..f896591bf 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -22,35 +22,29 @@ void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector& maxwell3d.regs.draw.topology.Assign( static_cast(parameters[0] & 0x3ffffff)); maxwell3d.regs.global_base_instance_index = parameters[5]; - maxwell3d.mme_draw.instance_count = instance_count; maxwell3d.regs.global_base_vertex_index = parameters[3]; maxwell3d.regs.index_buffer.count = parameters[1]; maxwell3d.regs.index_buffer.first = parameters[4]; if (maxwell3d.ShouldExecute()) { - maxwell3d.Rasterizer().Draw(true, true); + maxwell3d.Rasterizer().Draw(true, instance_count); } maxwell3d.regs.index_buffer.count = 0; - maxwell3d.mme_draw.instance_count = 0; - maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; } void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, const std::vector& parameters) { - const u32 count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); + const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); maxwell3d.regs.vertex_buffer.first = parameters[3]; maxwell3d.regs.vertex_buffer.count = parameters[1]; maxwell3d.regs.global_base_instance_index = parameters[4]; maxwell3d.regs.draw.topology.Assign( static_cast(parameters[0])); - maxwell3d.mme_draw.instance_count = count; if (maxwell3d.ShouldExecute()) { - maxwell3d.Rasterizer().Draw(false, true); + maxwell3d.Rasterizer().Draw(false, instance_count); } maxwell3d.regs.vertex_buffer.count = 0; - maxwell3d.mme_draw.instance_count = 0; - maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; } void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector& parameters) { @@ -63,24 +57,21 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector& maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; maxwell3d.regs.global_base_vertex_index = element_base; maxwell3d.regs.global_base_instance_index = base_instance; - maxwell3d.mme_draw.instance_count = instance_count; - maxwell3d.CallMethodFromMME(0x8e3, 0x640); - maxwell3d.CallMethodFromMME(0x8e4, element_base); - maxwell3d.CallMethodFromMME(0x8e5, base_instance); + maxwell3d.CallMethod(0x8e3, 0x640, true); + maxwell3d.CallMethod(0x8e4, element_base, true); + maxwell3d.CallMethod(0x8e5, base_instance, true); maxwell3d.regs.draw.topology.Assign( static_cast(parameters[0])); if (maxwell3d.ShouldExecute()) { - maxwell3d.Rasterizer().Draw(true, true); + maxwell3d.Rasterizer().Draw(true, instance_count); } maxwell3d.regs.vertex_id_base = 0x0; maxwell3d.regs.index_buffer.count = 0; maxwell3d.regs.global_base_vertex_index = 0x0; maxwell3d.regs.global_base_instance_index = 0x0; - maxwell3d.mme_draw.instance_count = 0; - maxwell3d.CallMethodFromMME(0x8e3, 0x640); - maxwell3d.CallMethodFromMME(0x8e4, 0x0); - maxwell3d.CallMethodFromMME(0x8e5, 0x0); - maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; + maxwell3d.CallMethod(0x8e3, 0x640, true); + maxwell3d.CallMethod(0x8e4, 0x0, true); + maxwell3d.CallMethod(0x8e5, 0x0, true); } // Multidraw Indirect @@ -91,11 +82,9 @@ void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector& maxwell3d.regs.index_buffer.count = 0; maxwell3d.regs.global_base_vertex_index = 0x0; maxwell3d.regs.global_base_instance_index = 0x0; - maxwell3d.mme_draw.instance_count = 0; - maxwell3d.CallMethodFromMME(0x8e3, 0x640); - maxwell3d.CallMethodFromMME(0x8e4, 0x0); - maxwell3d.CallMethodFromMME(0x8e5, 0x0); - maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; + maxwell3d.CallMethod(0x8e3, 0x640, true); + maxwell3d.CallMethod(0x8e4, 0x0, true); + maxwell3d.CallMethod(0x8e5, 0x0, true); maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; }); const u32 start_indirect = parameters[0]; @@ -127,15 +116,13 @@ void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector& maxwell3d.regs.index_buffer.count = num_vertices; maxwell3d.regs.global_base_vertex_index = base_vertex; maxwell3d.regs.global_base_instance_index = base_instance; - maxwell3d.mme_draw.instance_count = instance_count; - maxwell3d.CallMethodFromMME(0x8e3, 0x640); - maxwell3d.CallMethodFromMME(0x8e4, base_vertex); - maxwell3d.CallMethodFromMME(0x8e5, base_instance); + maxwell3d.CallMethod(0x8e3, 0x640, true); + maxwell3d.CallMethod(0x8e4, base_vertex, true); + maxwell3d.CallMethod(0x8e5, base_instance, true); maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; if (maxwell3d.ShouldExecute()) { - maxwell3d.Rasterizer().Draw(true, true); + maxwell3d.Rasterizer().Draw(true, instance_count); } - maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; } } diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp index f670b1bca..c0d32c112 100644 --- a/src/video_core/macro/macro_interpreter.cpp +++ b/src/video_core/macro/macro_interpreter.cpp @@ -335,7 +335,7 @@ void MacroInterpreterImpl::SetMethodAddress(u32 address) { } void MacroInterpreterImpl::Send(u32 value) { - maxwell3d.CallMethodFromMME(method_address.address, value); + maxwell3d.CallMethod(method_address.address, value, true); // Increment the method address by the method increment. method_address.address.Assign(method_address.address.Value() + method_address.increment.Value()); diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index a302a9603..25c1ce798 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -346,7 +346,7 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { } void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { - maxwell3d->CallMethodFromMME(method_address.address, value); + maxwell3d->CallMethod(method_address.address, value, true); } void MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index d2d40884c..1cbfef090 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -40,7 +40,7 @@ public: virtual ~RasterizerInterface() = default; /// Dispatches a draw invocation - virtual void Draw(bool is_indexed, bool is_instanced) = 0; + virtual void Draw(bool is_indexed, u32 instance_count) = 0; /// Clear the current framebuffer virtual void Clear() = 0; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e5c09a969..21bac6ebf 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -205,7 +205,7 @@ void RasterizerOpenGL::Clear() { ++num_queued_commands; } -void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { +void RasterizerOpenGL::Draw(bool is_indexed, u32 instance_count) { MICROPROFILE_SCOPE(OpenGL_Drawing); SCOPE_EXIT({ gpu.TickWork(); }); @@ -228,8 +228,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { BeginTransformFeedback(pipeline, primitive_mode); const GLuint base_instance = static_cast(maxwell3d->regs.global_base_instance_index); - const GLsizei num_instances = - static_cast(is_instanced ? maxwell3d->mme_draw.instance_count : 1); + const GLsizei num_instances = static_cast(instance_count); if (is_indexed) { const GLint base_vertex = static_cast(maxwell3d->regs.global_base_vertex_index); const GLsizei num_vertices = static_cast(maxwell3d->regs.index_buffer.count); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 45131b785..c93ba3b42 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -68,7 +68,7 @@ public: StateTracker& state_tracker_); ~RasterizerOpenGL() override; - void Draw(bool is_indexed, bool is_instanced) override; + void Draw(bool is_indexed, u32 instance_count) override; void Clear() override; void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 47dfb45a1..9a7d90b2a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -127,11 +127,10 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index, u32 up_scale = 1, u3 return scissor; } -DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, - bool is_indexed) { +DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_indexed) { DrawParams params{ .base_instance = regs.global_base_instance_index, - .num_instances = is_instanced ? num_instances : 1, + .num_instances = num_instances, .base_vertex = is_indexed ? regs.global_base_vertex_index : regs.vertex_buffer.first, .num_vertices = is_indexed ? regs.index_buffer.count : regs.vertex_buffer.count, .first_index = is_indexed ? regs.index_buffer.first : 0, @@ -177,7 +176,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra RasterizerVulkan::~RasterizerVulkan() = default; -void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { +void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) { MICROPROFILE_SCOPE(Vulkan_Drawing); SCOPE_EXIT({ gpu.TickWork(); }); @@ -199,8 +198,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { UpdateDynamicStates(); const auto& regs{maxwell3d->regs}; - const u32 num_instances{maxwell3d->mme_draw.instance_count}; - const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)}; + const u32 num_instances{instance_count}; + const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_indexed)}; scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) { if (draw_params.is_indexed) { cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 4cde3c983..b3a182588 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -64,7 +64,7 @@ public: StateTracker& state_tracker_, Scheduler& scheduler_); ~RasterizerVulkan() override; - void Draw(bool is_indexed, bool is_instanced) override; + void Draw(bool is_indexed, u32 instance_count) override; void Clear() override; void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; -- cgit v1.2.3 From 2f90694797e30088820937855acf613bdcf27247 Mon Sep 17 00:00:00 2001 From: FengChen Date: Fri, 21 Oct 2022 19:14:22 +0800 Subject: video_core: Implement maxwell inline_index method --- src/video_core/engines/maxwell_3d.cpp | 160 +++++++++++++---------- src/video_core/engines/maxwell_3d.h | 13 +- src/video_core/renderer_opengl/gl_rasterizer.cpp | 12 ++ src/video_core/renderer_opengl/gl_rasterizer.h | 2 + src/video_core/renderer_vulkan/vk_rasterizer.cpp | 15 +++ src/video_core/renderer_vulkan/vk_rasterizer.h | 2 + 6 files changed, 130 insertions(+), 74 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index b41aa6fc1..25fcdb1e3 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -126,6 +126,9 @@ void Maxwell3D::InitializeRegisterDefaults() { draw_command[MAXWELL3D_REG_INDEX(index_buffer32_first)] = true; draw_command[MAXWELL3D_REG_INDEX(index_buffer16_first)] = true; draw_command[MAXWELL3D_REG_INDEX(index_buffer8_first)] = true; + draw_command[MAXWELL3D_REG_INDEX(draw_inline_index)] = true; + draw_command[MAXWELL3D_REG_INDEX(inline_index_2x16.even)] = true; + draw_command[MAXWELL3D_REG_INDEX(inline_index_4x8.index0)] = true; } void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call) { @@ -271,6 +274,23 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { if (draw_command[method]) { regs.reg_array[method] = method_argument; deferred_draw_method.push_back(method); + auto u32_to_u8 = [&](const u32 argument) { + inline_index_draw_indexes.push_back(static_cast(argument & 0x000000ff)); + inline_index_draw_indexes.push_back(static_cast((argument & 0x0000ff00) >> 8)); + inline_index_draw_indexes.push_back(static_cast((argument & 0x00ff0000) >> 16)); + inline_index_draw_indexes.push_back(static_cast((argument & 0xff000000) >> 24)); + }; + if (MAXWELL3D_REG_INDEX(draw_inline_index) == method) { + u32_to_u8(method_argument); + } else if (MAXWELL3D_REG_INDEX(inline_index_2x16.even) == method) { + u32_to_u8(regs.inline_index_2x16.even); + u32_to_u8(regs.inline_index_2x16.odd); + } else if (MAXWELL3D_REG_INDEX(inline_index_4x8.index0) == method) { + u32_to_u8(regs.inline_index_4x8.index0); + u32_to_u8(regs.inline_index_4x8.index1); + u32_to_u8(regs.inline_index_4x8.index2); + u32_to_u8(regs.inline_index_4x8.index3); + } } else { ProcessDeferredDraw(); @@ -567,86 +587,94 @@ void Maxwell3D::ProcessClearBuffers() { } void Maxwell3D::ProcessDeferredDraw() { - auto method_count = deferred_draw_method.size(); - if (method_count) { - enum class DrawMode { - Undefined, - General, - Instance, - }; - DrawMode draw_mode{DrawMode::Undefined}; - u32 instance_count = 1; - - auto first_method = deferred_draw_method[0]; - if (MAXWELL3D_REG_INDEX(draw.begin) == first_method) { - // The minimum number of methods for drawing must be greater than or equal to - // 3[draw.begin->vertex(index)count->draw.end] to avoid errors in index mode drawing - if (method_count < 3) { - return; - } - draw_mode = - (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Subsequent) || - (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Unchanged) - ? DrawMode::Instance - : DrawMode::General; - } else if (MAXWELL3D_REG_INDEX(index_buffer32_first) == first_method || - MAXWELL3D_REG_INDEX(index_buffer16_first) == first_method || - MAXWELL3D_REG_INDEX(index_buffer8_first) == first_method) { - draw_mode = DrawMode::General; - } + if (deferred_draw_method.empty()) { + return; + } + + enum class DrawMode { + Undefined, + General, + Instance, + }; + DrawMode draw_mode{DrawMode::Undefined}; + u32 instance_count = 1; - // Drawing will only begin with draw.begin or index_buffer method, other methods directly - // clear - if (draw_mode == DrawMode::Undefined) { - deferred_draw_method.clear(); + auto first_method = deferred_draw_method[0]; + if (MAXWELL3D_REG_INDEX(draw.begin) == first_method) { + // The minimum number of methods for drawing must be greater than or equal to + // 3[draw.begin->vertex(index)count->draw.end] to avoid errors in index mode drawing + if (deferred_draw_method.size() < 3) { return; } + draw_mode = (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Subsequent) || + (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Unchanged) + ? DrawMode::Instance + : DrawMode::General; + } else if (MAXWELL3D_REG_INDEX(index_buffer32_first) == first_method || + MAXWELL3D_REG_INDEX(index_buffer16_first) == first_method || + MAXWELL3D_REG_INDEX(index_buffer8_first) == first_method) { + draw_mode = DrawMode::General; + } + + // Drawing will only begin with draw.begin or index_buffer method, other methods directly + // clear + if (draw_mode == DrawMode::Undefined) { + deferred_draw_method.clear(); + return; + } - if (draw_mode == DrawMode::Instance) { - ASSERT_MSG(deferred_draw_method.size() % 4 == 0, "Instance mode method size error"); - instance_count = static_cast(deferred_draw_method.size()) / 4; + if (draw_mode == DrawMode::Instance) { + ASSERT_MSG(deferred_draw_method.size() % 4 == 0, "Instance mode method size error"); + instance_count = static_cast(deferred_draw_method.size()) / 4; + } else { + if (MAXWELL3D_REG_INDEX(index_buffer32_first) == first_method) { + regs.index_buffer.count = regs.index_buffer32_first.count; + regs.index_buffer.first = regs.index_buffer32_first.first; + dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + } else if (MAXWELL3D_REG_INDEX(index_buffer32_first) == first_method) { + regs.index_buffer.count = regs.index_buffer16_first.count; + regs.index_buffer.first = regs.index_buffer16_first.first; + dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + } else if (MAXWELL3D_REG_INDEX(index_buffer32_first) == first_method) { + regs.index_buffer.count = regs.index_buffer8_first.count; + regs.index_buffer.first = regs.index_buffer8_first.first; + dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; } else { - if (MAXWELL3D_REG_INDEX(index_buffer32_first) == first_method) { - regs.index_buffer.count = regs.index_buffer32_first.count; - regs.index_buffer.first = regs.index_buffer32_first.first; - dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - } else if (MAXWELL3D_REG_INDEX(index_buffer32_first) == first_method) { - regs.index_buffer.count = regs.index_buffer16_first.count; - regs.index_buffer.first = regs.index_buffer16_first.first; - dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - } else if (MAXWELL3D_REG_INDEX(index_buffer32_first) == first_method) { - regs.index_buffer.count = regs.index_buffer8_first.count; - regs.index_buffer.first = regs.index_buffer8_first.first; - dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + auto second_method = deferred_draw_method[1]; + if (MAXWELL3D_REG_INDEX(draw_inline_index) == second_method || + MAXWELL3D_REG_INDEX(inline_index_2x16.even) == second_method || + MAXWELL3D_REG_INDEX(inline_index_4x8.index0) == second_method) { + regs.index_buffer.count = static_cast(inline_index_draw_indexes.size() / 4); + regs.index_buffer.format = Regs::IndexFormat::UnsignedInt; } } + } - LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(), - regs.vertex_buffer.count); + LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(), + regs.vertex_buffer.count); - ASSERT_MSG(!(regs.index_buffer.count && regs.vertex_buffer.count), - "Both indexed and direct?"); + ASSERT_MSG(!(regs.index_buffer.count && regs.vertex_buffer.count), "Both indexed and direct?"); - // Both instance configuration registers can not be set at the same time. - ASSERT_MSG(regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::First || - regs.draw.instance_id != Maxwell3D::Regs::Draw::InstanceId::Unchanged, - "Illegal combination of instancing parameters"); + // Both instance configuration registers can not be set at the same time. + ASSERT_MSG(regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::First || + regs.draw.instance_id != Maxwell3D::Regs::Draw::InstanceId::Unchanged, + "Illegal combination of instancing parameters"); - ProcessTopologyOverride(); + ProcessTopologyOverride(); - const bool is_indexed = regs.index_buffer.count && !regs.vertex_buffer.count; - if (ShouldExecute()) { - rasterizer->Draw(is_indexed, instance_count); - } - - if (is_indexed) { - regs.index_buffer.count = 0; - } else { - regs.vertex_buffer.count = 0; - } + const bool is_indexed = regs.index_buffer.count && !regs.vertex_buffer.count; + if (ShouldExecute()) { + rasterizer->Draw(is_indexed, instance_count); + } - deferred_draw_method.clear(); + if (is_indexed) { + regs.index_buffer.count = 0; + } else { + regs.vertex_buffer.count = 0; } + + deferred_draw_method.clear(); + inline_index_draw_indexes.clear(); } } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 1472e8871..bd23ebc12 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1739,14 +1739,11 @@ public: Footprint_1x1_Virtual = 2, }; - struct InlineIndex4x8Align { + struct InlineIndex4x8 { union { BitField<0, 30, u32> count; BitField<30, 2, u32> start; }; - }; - - struct InlineIndex4x8Index { union { BitField<0, 8, u32> index0; BitField<8, 8, u32> index1; @@ -2836,8 +2833,7 @@ public: u32 depth_write_enabled; ///< 0x12E8 u32 alpha_test_enabled; ///< 0x12EC INSERT_PADDING_BYTES_NOINIT(0x10); - InlineIndex4x8Align inline_index_4x8_align; ///< 0x1300 - InlineIndex4x8Index inline_index_4x8_index; ///< 0x1304 + InlineIndex4x8 inline_index_4x8; ///< 0x1300 D3DCullMode d3d_cull_mode; ///< 0x1308 ComparisonOp depth_test_func; ///< 0x130C f32 alpha_test_ref; ///< 0x1310 @@ -3083,6 +3079,8 @@ public: Tables tables{}; } dirty; + std::vector inline_index_draw_indexes; + private: void InitializeRegisterDefaults(); @@ -3377,8 +3375,7 @@ ASSERT_REG_POSITION(alpha_to_coverage_dither, 0x12E0); ASSERT_REG_POSITION(blend_per_target_enabled, 0x12E4); ASSERT_REG_POSITION(depth_write_enabled, 0x12E8); ASSERT_REG_POSITION(alpha_test_enabled, 0x12EC); -ASSERT_REG_POSITION(inline_index_4x8_align, 0x1300); -ASSERT_REG_POSITION(inline_index_4x8_index, 0x1304); +ASSERT_REG_POSITION(inline_index_4x8, 0x1300); ASSERT_REG_POSITION(d3d_cull_mode, 0x1308); ASSERT_REG_POSITION(depth_test_func, 0x130C); ASSERT_REG_POSITION(alpha_test_ref, 0x1310); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 21bac6ebf..1590b21de 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -222,6 +222,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, u32 instance_count) { pipeline->SetEngine(maxwell3d, gpu_memory); pipeline->Configure(is_indexed); + BindInlineIndexBuffer(); + SyncState(); const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d->regs.draw.topology); @@ -1128,6 +1130,16 @@ void RasterizerOpenGL::ReleaseChannel(s32 channel_id) { query_cache.EraseChannel(channel_id); } +void RasterizerOpenGL::BindInlineIndexBuffer() { + if (maxwell3d->inline_index_draw_indexes.empty()) { + return; + } + const auto data_count = static_cast(maxwell3d->inline_index_draw_indexes.size()); + auto buffer = Buffer(buffer_cache_runtime, *this, 0, data_count); + buffer.ImmediateUpload(0, maxwell3d->inline_index_draw_indexes); + buffer_cache_runtime.BindIndexBuffer(buffer, 0, data_count); +} + AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {} bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index c93ba3b42..793e0d608 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -199,6 +199,8 @@ private: /// End a transform feedback void EndTransformFeedback(); + void BindInlineIndexBuffer(); + Tegra::GPU& gpu; const Device& device; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 9a7d90b2a..9f05a7a18 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -193,6 +193,8 @@ void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) { pipeline->SetEngine(maxwell3d, gpu_memory); pipeline->Configure(is_indexed); + BindInlineIndexBuffer(); + BeginTransformFeedback(); UpdateDynamicStates(); @@ -1008,4 +1010,17 @@ void RasterizerVulkan::ReleaseChannel(s32 channel_id) { query_cache.EraseChannel(channel_id); } +void RasterizerVulkan::BindInlineIndexBuffer() { + if (maxwell3d->inline_index_draw_indexes.empty()) { + return; + } + const auto data_count = static_cast(maxwell3d->inline_index_draw_indexes.size()); + auto buffer = buffer_cache_runtime.UploadStagingBuffer(data_count); + std::memcpy(buffer.mapped_span.data(), maxwell3d->inline_index_draw_indexes.data(), data_count); + buffer_cache_runtime.BindIndexBuffer( + maxwell3d->regs.draw.topology, maxwell3d->regs.index_buffer.format, + maxwell3d->regs.index_buffer.first, maxwell3d->regs.index_buffer.count, buffer.buffer, + static_cast(buffer.offset), data_count); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index b3a182588..e2fdc7611 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -141,6 +141,8 @@ private: void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs); + void BindInlineIndexBuffer(); + Tegra::GPU& gpu; ScreenInfo& screen_info; -- cgit v1.2.3